import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
df1=pd.read_csv('D:/training_set.csv')
df1
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 60 | RL | 65.0 | 8450 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2008 | WD | Normal | 208500 |
| 1 | 2 | 20 | RL | 80.0 | 9600 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 5 | 2007 | WD | Normal | 181500 |
| 2 | 3 | 60 | RL | 68.0 | 11250 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 9 | 2008 | WD | Normal | 223500 |
| 3 | 4 | 70 | RL | 60.0 | 9550 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2006 | WD | Abnorml | 140000 |
| 4 | 5 | 60 | RL | 84.0 | 14260 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 12 | 2008 | WD | Normal | 250000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 1456 | 60 | RL | 62.0 | 7917 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 8 | 2007 | WD | Normal | 175000 |
| 1456 | 1457 | 20 | RL | 85.0 | 13175 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | MnPrv | NaN | 0 | 2 | 2010 | WD | Normal | 210000 |
| 1457 | 1458 | 70 | RL | 66.0 | 9042 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | GdPrv | Shed | 2500 | 5 | 2010 | WD | Normal | 266500 |
| 1458 | 1459 | 20 | RL | 68.0 | 9717 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 4 | 2010 | WD | Normal | 142125 |
| 1459 | 1460 | 20 | RL | 75.0 | 9937 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 6 | 2008 | WD | Normal | 147500 |
1460 rows × 81 columns
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1460 entries, 0 to 1459 Data columns (total 81 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1460 non-null int64 1 MSSubClass 1460 non-null int64 2 MSZoning 1460 non-null object 3 LotFrontage 1201 non-null float64 4 LotArea 1460 non-null int64 5 Street 1460 non-null object 6 Alley 91 non-null object 7 LotShape 1460 non-null object 8 LandContour 1460 non-null object 9 Utilities 1460 non-null object 10 LotConfig 1460 non-null object 11 LandSlope 1460 non-null object 12 Neighborhood 1460 non-null object 13 Condition1 1460 non-null object 14 Condition2 1460 non-null object 15 BldgType 1460 non-null object 16 HouseStyle 1460 non-null object 17 OverallQual 1460 non-null int64 18 OverallCond 1460 non-null int64 19 YearBuilt 1460 non-null int64 20 YearRemodAdd 1460 non-null int64 21 RoofStyle 1460 non-null object 22 RoofMatl 1460 non-null object 23 Exterior1st 1460 non-null object 24 Exterior2nd 1460 non-null object 25 MasVnrType 1452 non-null object 26 MasVnrArea 1452 non-null float64 27 ExterQual 1460 non-null object 28 ExterCond 1460 non-null object 29 Foundation 1460 non-null object 30 BsmtQual 1423 non-null object 31 BsmtCond 1423 non-null object 32 BsmtExposure 1422 non-null object 33 BsmtFinType1 1423 non-null object 34 BsmtFinSF1 1460 non-null int64 35 BsmtFinType2 1422 non-null object 36 BsmtFinSF2 1460 non-null int64 37 BsmtUnfSF 1460 non-null int64 38 TotalBsmtSF 1460 non-null int64 39 Heating 1460 non-null object 40 HeatingQC 1460 non-null object 41 CentralAir 1460 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1460 non-null int64 44 2ndFlrSF 1460 non-null int64 45 LowQualFinSF 1460 non-null int64 46 GrLivArea 1460 non-null int64 47 BsmtFullBath 1460 non-null int64 48 BsmtHalfBath 1460 non-null int64 49 FullBath 1460 non-null int64 50 HalfBath 1460 non-null int64 51 BedroomAbvGr 1460 non-null int64 52 KitchenAbvGr 1460 non-null int64 53 KitchenQual 1460 non-null object 54 TotRmsAbvGrd 1460 non-null int64 55 Functional 1460 non-null object 56 Fireplaces 1460 non-null int64 57 FireplaceQu 770 non-null object 58 GarageType 1379 non-null object 59 GarageYrBlt 1379 non-null float64 60 GarageFinish 1379 non-null object 61 GarageCars 1460 non-null int64 62 GarageArea 1460 non-null int64 63 GarageQual 1379 non-null object 64 GarageCond 1379 non-null object 65 PavedDrive 1460 non-null object 66 WoodDeckSF 1460 non-null int64 67 OpenPorchSF 1460 non-null int64 68 EnclosedPorch 1460 non-null int64 69 3SsnPorch 1460 non-null int64 70 ScreenPorch 1460 non-null int64 71 PoolArea 1460 non-null int64 72 PoolQC 7 non-null object 73 Fence 281 non-null object 74 MiscFeature 54 non-null object 75 MiscVal 1460 non-null int64 76 MoSold 1460 non-null int64 77 YrSold 1460 non-null int64 78 SaleType 1460 non-null object 79 SaleCondition 1460 non-null object 80 SalePrice 1460 non-null int64 dtypes: float64(3), int64(35), object(43) memory usage: 924.0+ KB
df1['SaleCondition'].value_counts()
Normal 1198 Partial 125 Abnorml 101 Family 20 Alloca 12 AdjLand 4 Name: SaleCondition, dtype: int64
df1.shape
(1460, 81)
df1.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 259
LotArea 0
...
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 81, dtype: int64
for i in df1.columns:
if (df1[i].isna().sum())>0:
if df1[i].dtypes=='object':
x=df1[i].mode()[0]
df1[i]=df1[i].fillna(x)
else:
x=df1[i].mean()
df1[i]=df1[i].fillna(x)
df1.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
..
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 81, dtype: int64
X=df1.drop(labels=['SalePrice','Id','LowQualFinSF','MiscVal'],axis=1)
Y=df1['SalePrice']
X.shape
(1460, 77)
Y.shape
(1460,)
cat=[]
con=[]
for i in X.columns:
if X[i].dtypes=='object':
cat.append(i)
else:
con.append(i)
print(cat)
print(con)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
cat=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish',
'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
con=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath',
'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
'MoSold', 'YrSold']
for i in df1.columns:
if df1[i].dtypes=='object':
df1[i].value_counts().plot(kind='bar')
plt.show()
else:
sb.histplot(data=df1,x=df1[i],kde=True)
plt.show()
for i in df1.columns:
if df1[i].dtypes=='object':
sb.boxplot(data=df1,x=df1[i],y='SalePrice')
plt.show()
else:
plt.scatter(data=df1,x=df1[i],y='SalePrice')
plt.xlabel(i)
plt.ylabel('SalePrice')
plt.title(f'{i} vs SalePrice')
plt.show()
a=df1.corr()
a
C:\Users\Alankar\AppData\Local\Temp\ipykernel_13472\2980587853.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. a=df1.corr()
| Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Id | 1.000000 | 0.011156 | -0.009601 | -0.033226 | -0.028365 | 0.012609 | -0.012713 | -0.021998 | -0.050199 | -0.005024 | ... | -0.029643 | -0.000477 | 0.002889 | -0.046635 | 0.001330 | 0.057044 | -0.006242 | 0.021172 | 0.000712 | -0.021917 |
| MSSubClass | 0.011156 | 1.000000 | -0.357056 | -0.139781 | 0.032628 | -0.059316 | 0.027850 | 0.040581 | 0.022895 | -0.069836 | ... | -0.012579 | -0.006100 | -0.012037 | -0.043825 | -0.026030 | 0.008283 | -0.007683 | -0.013585 | -0.021407 | -0.084284 |
| LotFrontage | -0.009601 | -0.357056 | 1.000000 | 0.306795 | 0.234196 | -0.052820 | 0.117598 | 0.082746 | 0.179283 | 0.215828 | ... | 0.077106 | 0.137454 | 0.009790 | 0.062335 | 0.037684 | 0.180868 | 0.001168 | 0.010158 | 0.006768 | 0.334901 |
| LotArea | -0.033226 | -0.139781 | 0.306795 | 1.000000 | 0.105806 | -0.005636 | 0.014228 | 0.013788 | 0.103960 | 0.214103 | ... | 0.171698 | 0.084774 | -0.018340 | 0.020423 | 0.043160 | 0.077672 | 0.038068 | 0.001205 | -0.014261 | 0.263843 |
| OverallQual | -0.028365 | 0.032628 | 0.234196 | 0.105806 | 1.000000 | -0.091932 | 0.572323 | 0.550684 | 0.410238 | 0.239666 | ... | 0.238923 | 0.308819 | -0.113937 | 0.030371 | 0.064886 | 0.065166 | -0.031406 | 0.070815 | -0.027347 | 0.790982 |
| OverallCond | 0.012609 | -0.059316 | -0.052820 | -0.005636 | -0.091932 | 1.000000 | -0.375983 | 0.073741 | -0.127788 | -0.046231 | ... | -0.003334 | -0.032589 | 0.070356 | 0.025504 | 0.054811 | -0.001985 | 0.068777 | -0.003511 | 0.043950 | -0.077856 |
| YearBuilt | -0.012713 | 0.027850 | 0.117598 | 0.014228 | 0.572323 | -0.375983 | 1.000000 | 0.592855 | 0.314745 | 0.249503 | ... | 0.224880 | 0.188686 | -0.387268 | 0.031355 | -0.050364 | 0.004950 | -0.034383 | 0.012398 | -0.013618 | 0.522897 |
| YearRemodAdd | -0.021998 | 0.040581 | 0.082746 | 0.013788 | 0.550684 | 0.073741 | 0.592855 | 1.000000 | 0.179186 | 0.128451 | ... | 0.205726 | 0.226298 | -0.193919 | 0.045286 | -0.038740 | 0.005829 | -0.010286 | 0.021490 | 0.035743 | 0.507101 |
| MasVnrArea | -0.050199 | 0.022895 | 0.179283 | 0.103960 | 0.410238 | -0.127788 | 0.314745 | 0.179186 | 1.000000 | 0.263582 | ... | 0.159349 | 0.124965 | -0.109849 | 0.018795 | 0.061453 | 0.011723 | -0.029815 | -0.005940 | -0.008184 | 0.475241 |
| BsmtFinSF1 | -0.005024 | -0.069836 | 0.215828 | 0.214103 | 0.239666 | -0.046231 | 0.249503 | 0.128451 | 0.263582 | 1.000000 | ... | 0.204306 | 0.111761 | -0.102303 | 0.026451 | 0.062021 | 0.140491 | 0.003571 | -0.015727 | 0.014359 | 0.386420 |
| BsmtFinSF2 | -0.005968 | -0.065649 | 0.043340 | 0.111170 | -0.059119 | 0.040229 | -0.049107 | -0.067759 | -0.072302 | -0.050117 | ... | 0.067898 | 0.003093 | 0.036543 | -0.029993 | 0.088871 | 0.041709 | 0.004940 | -0.015211 | 0.031706 | -0.011378 |
| BsmtUnfSF | -0.007940 | -0.140759 | 0.122156 | -0.002618 | 0.308159 | -0.136841 | 0.149040 | 0.181133 | 0.114184 | -0.495251 | ... | -0.005316 | 0.129005 | -0.002538 | 0.020764 | -0.012579 | -0.035092 | -0.023837 | 0.034888 | -0.041258 | 0.214479 |
| TotalBsmtSF | -0.015415 | -0.238518 | 0.363358 | 0.260833 | 0.537808 | -0.171098 | 0.391452 | 0.291066 | 0.362452 | 0.522396 | ... | 0.232019 | 0.247264 | -0.095478 | 0.037384 | 0.084489 | 0.126053 | -0.018479 | 0.013196 | -0.014969 | 0.613581 |
| 1stFlrSF | 0.010496 | -0.251758 | 0.414266 | 0.299475 | 0.476224 | -0.144203 | 0.281986 | 0.240379 | 0.342160 | 0.445863 | ... | 0.235459 | 0.211671 | -0.065292 | 0.056104 | 0.088758 | 0.131525 | -0.021096 | 0.031372 | -0.013604 | 0.605852 |
| 2ndFlrSF | 0.005590 | 0.307886 | 0.072483 | 0.050986 | 0.295493 | 0.028942 | 0.010308 | 0.140024 | 0.174019 | -0.137079 | ... | 0.092165 | 0.208026 | 0.061989 | -0.024358 | 0.040606 | 0.081487 | 0.016197 | 0.035164 | -0.028700 | 0.319334 |
| LowQualFinSF | -0.044230 | 0.046474 | 0.036849 | 0.004779 | -0.030429 | 0.025494 | -0.183784 | -0.062419 | -0.069068 | -0.064503 | ... | -0.025444 | 0.018251 | 0.061081 | -0.004296 | 0.026799 | 0.062157 | -0.003793 | -0.022174 | -0.028921 | -0.025606 |
| GrLivArea | 0.008273 | 0.074853 | 0.368392 | 0.263116 | 0.593007 | -0.079686 | 0.199010 | 0.287389 | 0.389893 | 0.208171 | ... | 0.247433 | 0.330224 | 0.009113 | 0.020643 | 0.101510 | 0.170205 | -0.002416 | 0.050240 | -0.036526 | 0.708624 |
| BsmtFullBath | 0.002289 | 0.003491 | 0.091481 | 0.158155 | 0.111098 | -0.054942 | 0.187599 | 0.119470 | 0.085055 | 0.649212 | ... | 0.175315 | 0.067341 | -0.049911 | -0.000106 | 0.023148 | 0.067616 | -0.023047 | -0.025361 | 0.067049 | 0.227122 |
| BsmtHalfBath | -0.020155 | -0.002333 | -0.006419 | 0.048046 | -0.040150 | 0.117821 | -0.038162 | -0.012337 | 0.026669 | 0.067418 | ... | 0.040161 | -0.025324 | -0.008555 | 0.035114 | 0.032121 | 0.020025 | -0.007367 | 0.032873 | -0.046524 | -0.016844 |
| FullBath | 0.005587 | 0.131608 | 0.180424 | 0.126031 | 0.550600 | -0.194149 | 0.468271 | 0.439046 | 0.275730 | 0.058543 | ... | 0.187703 | 0.259977 | -0.115093 | 0.035353 | -0.008106 | 0.049604 | -0.014290 | 0.055872 | -0.019669 | 0.560664 |
| HalfBath | 0.006784 | 0.177354 | 0.048258 | 0.014259 | 0.273458 | -0.060769 | 0.242656 | 0.183331 | 0.200802 | 0.004262 | ... | 0.108080 | 0.199740 | -0.095317 | -0.004972 | 0.072426 | 0.022381 | 0.001290 | -0.009050 | -0.010269 | 0.284108 |
| BedroomAbvGr | 0.037719 | -0.023438 | 0.237023 | 0.119690 | 0.101676 | 0.012980 | -0.070651 | -0.040581 | 0.102417 | -0.107355 | ... | 0.046854 | 0.093810 | 0.041570 | -0.024478 | 0.044300 | 0.070703 | 0.007767 | 0.046544 | -0.036014 | 0.168213 |
| KitchenAbvGr | 0.002951 | 0.281721 | -0.005805 | -0.017784 | -0.183882 | -0.087001 | -0.174800 | -0.149598 | -0.037364 | -0.081007 | ... | -0.090130 | -0.070091 | 0.037312 | -0.024600 | -0.051613 | -0.014525 | 0.062341 | 0.026589 | 0.031687 | -0.135907 |
| TotRmsAbvGrd | 0.027239 | 0.040380 | 0.320146 | 0.190015 | 0.427452 | -0.057583 | 0.095589 | 0.191740 | 0.280027 | 0.044316 | ... | 0.165984 | 0.234192 | 0.004151 | -0.006683 | 0.059383 | 0.083757 | 0.024763 | 0.036907 | -0.034516 | 0.533723 |
| Fireplaces | -0.019772 | -0.045569 | 0.235755 | 0.271364 | 0.396765 | -0.023820 | 0.147716 | 0.112581 | 0.247906 | 0.260011 | ... | 0.200019 | 0.169405 | -0.024822 | 0.011257 | 0.184530 | 0.095074 | 0.001409 | 0.046357 | -0.024096 | 0.466929 |
| GarageYrBlt | 0.000070 | 0.080187 | 0.064324 | -0.024812 | 0.518018 | -0.306169 | 0.780555 | 0.618130 | 0.249367 | 0.150338 | ... | 0.220623 | 0.218490 | -0.285882 | 0.023534 | -0.075256 | -0.014499 | -0.031853 | 0.005173 | -0.000987 | 0.470177 |
| GarageCars | 0.016570 | -0.040110 | 0.269729 | 0.154871 | 0.600671 | -0.185758 | 0.537850 | 0.420622 | 0.363778 | 0.224054 | ... | 0.226342 | 0.213569 | -0.151434 | 0.035765 | 0.050494 | 0.020934 | -0.043080 | 0.040522 | -0.039117 | 0.640409 |
| GarageArea | 0.017634 | -0.098672 | 0.323663 | 0.180403 | 0.562022 | -0.151521 | 0.478954 | 0.371600 | 0.372567 | 0.296970 | ... | 0.224666 | 0.241435 | -0.121777 | 0.035087 | 0.051412 | 0.061047 | -0.027400 | 0.027974 | -0.027378 | 0.623431 |
| WoodDeckSF | -0.029643 | -0.012579 | 0.077106 | 0.171698 | 0.238923 | -0.003334 | 0.224880 | 0.205726 | 0.159349 | 0.204306 | ... | 1.000000 | 0.058661 | -0.125989 | -0.032771 | -0.074181 | 0.073378 | -0.009551 | 0.021011 | 0.022270 | 0.324413 |
| OpenPorchSF | -0.000477 | -0.006100 | 0.137454 | 0.084774 | 0.308819 | -0.032589 | 0.188686 | 0.226298 | 0.124965 | 0.111761 | ... | 0.058661 | 1.000000 | -0.093079 | -0.005842 | 0.074304 | 0.060762 | -0.018584 | 0.071255 | -0.057619 | 0.315856 |
| EnclosedPorch | 0.002889 | -0.012037 | 0.009790 | -0.018340 | -0.113937 | 0.070356 | -0.387268 | -0.193919 | -0.109849 | -0.102303 | ... | -0.125989 | -0.093079 | 1.000000 | -0.037305 | -0.082864 | 0.054203 | 0.018361 | -0.028887 | -0.009916 | -0.128578 |
| 3SsnPorch | -0.046635 | -0.043825 | 0.062335 | 0.020423 | 0.030371 | 0.025504 | 0.031355 | 0.045286 | 0.018795 | 0.026451 | ... | -0.032771 | -0.005842 | -0.037305 | 1.000000 | -0.031436 | -0.007992 | 0.000354 | 0.029474 | 0.018645 | 0.044584 |
| ScreenPorch | 0.001330 | -0.026030 | 0.037684 | 0.043160 | 0.064886 | 0.054811 | -0.050364 | -0.038740 | 0.061453 | 0.062021 | ... | -0.074181 | 0.074304 | -0.082864 | -0.031436 | 1.000000 | 0.051307 | 0.031946 | 0.023217 | 0.010694 | 0.111447 |
| PoolArea | 0.057044 | 0.008283 | 0.180868 | 0.077672 | 0.065166 | -0.001985 | 0.004950 | 0.005829 | 0.011723 | 0.140491 | ... | 0.073378 | 0.060762 | 0.054203 | -0.007992 | 0.051307 | 1.000000 | 0.029669 | -0.033737 | -0.059689 | 0.092404 |
| MiscVal | -0.006242 | -0.007683 | 0.001168 | 0.038068 | -0.031406 | 0.068777 | -0.034383 | -0.010286 | -0.029815 | 0.003571 | ... | -0.009551 | -0.018584 | 0.018361 | 0.000354 | 0.031946 | 0.029669 | 1.000000 | -0.006495 | 0.004906 | -0.021190 |
| MoSold | 0.021172 | -0.013585 | 0.010158 | 0.001205 | 0.070815 | -0.003511 | 0.012398 | 0.021490 | -0.005940 | -0.015727 | ... | 0.021011 | 0.071255 | -0.028887 | 0.029474 | 0.023217 | -0.033737 | -0.006495 | 1.000000 | -0.145721 | 0.046432 |
| YrSold | 0.000712 | -0.021407 | 0.006768 | -0.014261 | -0.027347 | 0.043950 | -0.013618 | 0.035743 | -0.008184 | 0.014359 | ... | 0.022270 | -0.057619 | -0.009916 | 0.018645 | 0.010694 | -0.059689 | 0.004906 | -0.145721 | 1.000000 | -0.028923 |
| SalePrice | -0.021917 | -0.084284 | 0.334901 | 0.263843 | 0.790982 | -0.077856 | 0.522897 | 0.507101 | 0.475241 | 0.386420 | ... | 0.324413 | 0.315856 | -0.128578 | 0.044584 | 0.111447 | 0.092404 | -0.021190 | 0.046432 | -0.028923 | 1.000000 |
38 rows × 38 columns
sb.heatmap(a)
<Axes: >
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
X1=pd.DataFrame(ss.fit_transform(X[con]),columns=con)
X1
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MoSold | YrSold | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.073375 | -0.229372 | -0.207142 | 0.651479 | -0.517200 | 1.050994 | 0.878668 | 0.511418 | 0.575425 | -0.288653 | ... | 0.311725 | 0.351000 | -0.752176 | 0.216503 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | 0.138777 |
| 1 | -0.872563 | 0.451936 | -0.091886 | -0.071836 | 2.179628 | 0.156734 | -0.429577 | -0.574410 | 1.171992 | -0.288653 | ... | 0.311725 | -0.060731 | 1.626195 | -0.704483 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.489110 | -0.614439 |
| 2 | 0.073375 | -0.093110 | 0.073480 | 0.651479 | -0.517200 | 0.984752 | 0.830215 | 0.323060 | 0.092907 | -0.288653 | ... | 0.311725 | 0.631726 | -0.752176 | -0.070361 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 0.990891 | 0.138777 |
| 3 | 0.309859 | -0.456474 | -0.096897 | 0.651479 | -0.517200 | -1.863632 | -0.720298 | -0.574410 | -0.499274 | -0.288653 | ... | 1.650307 | 0.790804 | -0.752176 | -0.176048 | 4.092524 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | -1.367655 |
| 4 | 0.073375 | 0.633618 | 0.375148 | 1.374795 | -0.517200 | 0.951632 | 0.733308 | 1.364570 | 0.463568 | -0.288653 | ... | 1.650307 | 1.698485 | 0.780197 | 0.563760 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 2.100892 | 0.138777 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 0.073375 | -0.365633 | -0.260560 | -0.071836 | -0.517200 | 0.918511 | 0.733308 | -0.574410 | -0.973018 | -0.288653 | ... | 0.311725 | -0.060731 | -0.752176 | -0.100558 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 0.620891 | -0.614439 |
| 1456 | -0.872563 | 0.679039 | 0.266407 | -0.071836 | 0.381743 | 0.222975 | 0.151865 | 0.084843 | 0.759659 | 0.722112 | ... | 0.311725 | 0.126420 | 2.033231 | -0.704483 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | 1.645210 |
| 1457 | 0.309859 | -0.183951 | -0.147810 | 0.651479 | 3.078570 | -1.002492 | 1.024029 | -0.574410 | -0.369871 | -0.288653 | ... | -1.026858 | -1.033914 | -0.752176 | 0.201405 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.489110 | 1.645210 |
| 1458 | -0.872563 | -0.093110 | -0.080160 | -0.795151 | 0.381743 | -0.704406 | 0.539493 | -0.574410 | -0.865548 | 6.092188 | ... | -1.026858 | -1.090059 | 2.168910 | -0.704483 | 1.473789 | -0.116339 | -0.270208 | -0.068692 | -0.859110 | 1.645210 |
| 1459 | -0.872563 | 0.224833 | -0.058112 | -0.795151 | 0.381743 | -0.207594 | -0.962566 | -0.574410 | 0.847389 | 1.509640 | ... | -1.026858 | -0.921624 | 5.121921 | 0.322190 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.119110 | 0.138777 |
1460 rows × 34 columns
X2=pd.get_dummies(X[cat])
X2
| MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | Street_Grvl | Street_Pave | Alley_Grvl | Alley_Pave | LotShape_IR1 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1456 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1457 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1458 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1459 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1460 rows × 252 columns
out=[]
for i in con:
out.extend((X1[(X1[i]>3) | (X1[i]<-3)]).index)
print(out)
[9, 48, 93, 125, 165, 246, 291, 300, 312, 335, 411, 488, 520, 535, 635, 637, 703, 705, 713, 861, 969, 985, 1030, 1062, 1144, 1186, 1190, 1266, 1393, 1416, 171, 197, 231, 277, 313, 446, 807, 909, 934, 1107, 1127, 1173, 1182, 1211, 1298, 1337, 53, 249, 313, 335, 384, 451, 457, 661, 706, 769, 848, 1298, 1396, 375, 533, 88, 185, 191, 218, 241, 250, 304, 375, 378, 398, 461, 508, 519, 583, 676, 703, 726, 745, 980, 991, 1123, 1149, 1213, 1268, 1327, 1352, 1435, 1457, 304, 630, 747, 1132, 1137, 1349, 37, 58, 70, 105, 115, 161, 169, 178, 224, 297, 349, 403, 477, 517, 523, 654, 691, 718, 755, 763, 798, 808, 825, 898, 981, 1111, 1169, 1228, 1289, 1298, 1373, 1417, 70, 178, 523, 898, 1182, 1298, 24, 52, 113, 116, 153, 166, 233, 253, 260, 263, 271, 273, 313, 322, 355, 414, 440, 446, 470, 493, 542, 548, 577, 586, 599, 666, 697, 764, 785, 828, 842, 854, 888, 918, 923, 924, 1040, 1059, 1077, 1152, 1220, 1253, 1299, 1308, 1320, 1369, 1387, 1418, 1445, 1458, 137, 224, 278, 477, 496, 581, 678, 774, 798, 932, 1267, 224, 332, 440, 496, 523, 691, 1044, 1182, 1298, 1373, 224, 440, 496, 523, 529, 691, 898, 1024, 1044, 1182, 1298, 1373, 304, 691, 1169, 1182, 118, 185, 197, 304, 496, 523, 608, 635, 691, 769, 798, 1169, 1182, 1268, 1298, 1353, 53, 188, 313, 326, 335, 420, 454, 588, 634, 738, 807, 921, 942, 1163, 1270, 1298, 1, 26, 33, 37, 41, 50, 93, 116, 129, 176, 197, 201, 213, 215, 218, 245, 249, 251, 253, 298, 299, 314, 330, 352, 358, 367, 414, 421, 426, 499, 504, 558, 574, 576, 580, 597, 611, 628, 633, 658, 691, 697, 717, 741, 743, 745, 814, 828, 892, 920, 925, 931, 944, 952, 953, 954, 1006, 1029, 1041, 1047, 1052, 1055, 1069, 1072, 1076, 1080, 1103, 1118, 1123, 1149, 1156, 1181, 1213, 1225, 1276, 1287, 1327, 1335, 1350, 1389, 1405, 1415, 188, 298, 597, 624, 628, 921, 1154, 1163, 1230, 1283, 1350, 1450, 53, 144, 189, 291, 330, 570, 634, 635, 843, 897, 1163, 1213, 1270, 1350, 8, 9, 17, 39, 48, 74, 78, 93, 102, 137, 144, 165, 188, 246, 330, 342, 420, 441, 454, 488, 505, 520, 529, 570, 634, 635, 637, 676, 703, 705, 728, 736, 778, 809, 843, 886, 894, 897, 910, 913, 921, 940, 942, 943, 954, 955, 984, 1003, 1011, 1030, 1062, 1090, 1163, 1186, 1216, 1230, 1232, 1266, 1275, 1283, 1292, 1336, 1350, 1391, 1393, 1412, 1416, 1450, 185, 635, 769, 803, 897, 910, 1031, 1173, 1230, 1298, 1350, 1386, 166, 309, 605, 642, 1298, 93, 653, 178, 581, 664, 825, 1061, 1190, 1298, 53, 64, 166, 169, 335, 343, 357, 480, 661, 769, 828, 848, 893, 961, 974, 1044, 1068, 1210, 1312, 1313, 1423, 1459, 28, 185, 293, 495, 499, 523, 583, 591, 645, 664, 666, 713, 735, 775, 784, 807, 854, 875, 947, 961, 996, 1184, 1193, 1292, 1298, 1328, 1369, 3, 7, 154, 197, 260, 306, 314, 325, 328, 358, 365, 380, 459, 462, 496, 520, 577, 630, 648, 653, 660, 662, 718, 720, 747, 799, 813, 836, 840, 918, 939, 945, 1013, 1030, 1081, 1119, 1139, 1150, 1152, 1185, 1197, 1202, 1248, 1266, 1326, 1360, 1382, 1393, 1419, 1439, 1445, 5, 55, 120, 129, 159, 182, 187, 205, 237, 258, 280, 546, 704, 726, 744, 889, 941, 1080, 1156, 1161, 1181, 1346, 1437, 46, 72, 80, 104, 176, 185, 189, 196, 289, 297, 312, 339, 351, 359, 360, 366, 400, 426, 471, 475, 550, 605, 618, 625, 647, 673, 764, 769, 785, 795, 803, 828, 830, 854, 859, 887, 888, 907, 919, 944, 1037, 1055, 1067, 1070, 1154, 1171, 1184, 1228, 1282, 1293, 1301, 1320, 1328, 1386, 1414, 197, 810, 1170, 1182, 1298, 1386, 1423]
import numpy as np
outliers=np.unique(out)
outliers
array([ 1, 3, 5, 7, 8, 9, 17, 24, 26, 28, 33,
37, 39, 41, 46, 48, 50, 52, 53, 55, 58, 64,
70, 72, 74, 78, 80, 88, 93, 102, 104, 105, 113,
115, 116, 118, 120, 125, 129, 137, 144, 153, 154, 159,
161, 165, 166, 169, 171, 176, 178, 182, 185, 187, 188,
189, 191, 196, 197, 201, 205, 213, 215, 218, 224, 231,
233, 237, 241, 245, 246, 249, 250, 251, 253, 258, 260,
263, 271, 273, 277, 278, 280, 289, 291, 293, 297, 298,
299, 300, 304, 306, 309, 312, 313, 314, 322, 325, 326,
328, 330, 332, 335, 339, 342, 343, 349, 351, 352, 355,
357, 358, 359, 360, 365, 366, 367, 375, 378, 380, 384,
398, 400, 403, 411, 414, 420, 421, 426, 440, 441, 446,
451, 454, 457, 459, 461, 462, 470, 471, 475, 477, 480,
488, 493, 495, 496, 499, 504, 505, 508, 517, 519, 520,
523, 529, 533, 535, 542, 546, 548, 550, 558, 570, 574,
576, 577, 580, 581, 583, 586, 588, 591, 597, 599, 605,
608, 611, 618, 624, 625, 628, 630, 633, 634, 635, 637,
642, 645, 647, 648, 653, 654, 658, 660, 661, 662, 664,
666, 673, 676, 678, 691, 697, 703, 704, 705, 706, 713,
717, 718, 720, 726, 728, 735, 736, 738, 741, 743, 744,
745, 747, 755, 763, 764, 769, 774, 775, 778, 784, 785,
795, 798, 799, 803, 807, 808, 809, 810, 813, 814, 825,
828, 830, 836, 840, 842, 843, 848, 854, 859, 861, 875,
886, 887, 888, 889, 892, 893, 894, 897, 898, 907, 909,
910, 913, 918, 919, 920, 921, 923, 924, 925, 931, 932,
934, 939, 940, 941, 942, 943, 944, 945, 947, 952, 953,
954, 955, 961, 969, 974, 980, 981, 984, 985, 991, 996,
1003, 1006, 1011, 1013, 1024, 1029, 1030, 1031, 1037, 1040, 1041,
1044, 1047, 1052, 1055, 1059, 1061, 1062, 1067, 1068, 1069, 1070,
1072, 1076, 1077, 1080, 1081, 1090, 1103, 1107, 1111, 1118, 1119,
1123, 1127, 1132, 1137, 1139, 1144, 1149, 1150, 1152, 1154, 1156,
1161, 1163, 1169, 1170, 1171, 1173, 1181, 1182, 1184, 1185, 1186,
1190, 1193, 1197, 1202, 1210, 1211, 1213, 1216, 1220, 1225, 1228,
1230, 1232, 1248, 1253, 1266, 1267, 1268, 1270, 1275, 1276, 1282,
1283, 1287, 1289, 1292, 1293, 1298, 1299, 1301, 1308, 1312, 1313,
1320, 1326, 1327, 1328, 1335, 1336, 1337, 1346, 1349, 1350, 1352,
1353, 1360, 1369, 1373, 1382, 1386, 1387, 1389, 1391, 1393, 1396,
1405, 1412, 1414, 1415, 1416, 1417, 1418, 1419, 1423, 1435, 1437,
1439, 1445, 1450, 1457, 1458, 1459])
X1.drop(index=outliers,axis=0,inplace=True)
X2.drop(index=outliers,axis=0,inplace=True)
Y.drop(index=outliers,axis=0,inplace=True)
X1.shape
(1036, 34)
X2.shape
(1036, 252)
Y.shape
(1036,)
Xnew=X1.join(X2)
Xnew.shape
(1036, 286)
Xnew.index=range(0,1036)
Y.index=range(0,1036)
Xnew.head()
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.073375 | -0.229372 | -0.207142 | 0.651479 | -0.5172 | 1.050994 | 0.878668 | 0.511418 | 0.575425 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0.073375 | -0.093110 | 0.073480 | 0.651479 | -0.5172 | 0.984752 | 0.830215 | 0.323060 | 0.092907 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0.073375 | 0.633618 | 0.375148 | 1.374795 | -0.5172 | 0.951632 | 0.733308 | 1.364570 | 0.463568 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | -0.872563 | 0.224833 | -0.043379 | 1.374795 | -0.5172 | 1.084115 | 0.975575 | 0.456019 | 2.029558 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | -0.872563 | -0.002269 | 0.068469 | -0.795151 | -0.5172 | -0.207594 | -0.962566 | -0.574410 | 1.014077 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 286 columns
Xnew.columns
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
...
'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
'SaleCondition_Partial'],
dtype='object', length=286)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=45)
xtrain.shape
(828, 286)
xtrain.columns
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
...
'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
'SaleCondition_Partial'],
dtype='object', length=286)
ytrain.shape
(828,)
from statsmodels.api import OLS,add_constant
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
rsq
0.9279
model.pvalues.sort_values()
Neighborhood_StoneBr 9.279455e-12
BsmtExposure_Gd 5.010083e-09
OverallQual 7.418255e-09
BsmtFinSF1 1.349800e-08
TotalBsmtSF 2.484970e-07
...
GarageType_2Types NaN
GarageQual_Po NaN
PoolQC_Ex NaN
PoolQC_Fa NaN
MiscFeature_TenC NaN
Length: 286, dtype: float64
col_to_drop=model.pvalues.sort_values().index[-1]
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop SaleType_CWD
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop PoolQC_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop PoolQC_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop GarageQual_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop GarageType_2Types
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop Functional_Sev
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop Electrical_Mix
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop HeatingQC_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop KitchenQual_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop HouseStyle_1.5Fin
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop RoofStyle_Shed
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop MiscFeature_Gar2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop MiscFeature_Shed
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop BsmtFinType1_BLQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop BsmtExposure_Av
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Condition2_PosN
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop LandSlope_Gtl
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop RoofMatl_Tar&Grv
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop RoofStyle_Flat
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior1st_CemntBd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop LotShape_IR2
Xnew.columns
Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', '1stFlrSF',
...
'GarageFinish_Unf', 'GarageQual_Fa', 'PavedDrive_N', 'PavedDrive_Y',
'PoolQC_Gd', 'Fence_MnPrv', 'SaleCondition_Abnorml',
'SaleCondition_AdjLand', 'SaleCondition_Alloca',
'SaleCondition_Partial'],
dtype='object', length=105)
len(Xnew.columns)
105
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
model=lm.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
from sklearn.metrics import mean_squared_error,mean_absolute_error
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
train_err 245581225.50966182
print('test_err',ts_err)
test_err 466091937.34134614
print('train_ab',tr_ab)
train_ab 10878.526570048309
print('test_ab',ts_ab)
test_ab 14494.485576923076
from sklearn.linear_model import Ridge
rr=Ridge(alpha=0.2)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 245918989.70185483 test_err 446292659.093027 train_ab 10879.632101245055 test_ab 14234.733770509936
w=[]
e=0.01
for i in range(0,500,1):
w.append(e)
e=round(e+0.01,2)
w
[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0, 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1, 1.11, 1.12, 1.13, 1.14, 1.15, 1.16, 1.17, 1.18, 1.19, 1.2, 1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3, 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.38, 1.39, 1.4, 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6, 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.7, 1.71, 1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8, 1.81, 1.82, 1.83, 1.84, 1.85, 1.86, 1.87, 1.88, 1.89, 1.9, 1.91, 1.92, 1.93, 1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2.0, 2.01, 2.02, 2.03, 2.04, 2.05, 2.06, 2.07, 2.08, 2.09, 2.1, 2.11, 2.12, 2.13, 2.14, 2.15, 2.16, 2.17, 2.18, 2.19, 2.2, 2.21, 2.22, 2.23, 2.24, 2.25, 2.26, 2.27, 2.28, 2.29, 2.3, 2.31, 2.32, 2.33, 2.34, 2.35, 2.36, 2.37, 2.38, 2.39, 2.4, 2.41, 2.42, 2.43, 2.44, 2.45, 2.46, 2.47, 2.48, 2.49, 2.5, 2.51, 2.52, 2.53, 2.54, 2.55, 2.56, 2.57, 2.58, 2.59, 2.6, 2.61, 2.62, 2.63, 2.64, 2.65, 2.66, 2.67, 2.68, 2.69, 2.7, 2.71, 2.72, 2.73, 2.74, 2.75, 2.76, 2.77, 2.78, 2.79, 2.8, 2.81, 2.82, 2.83, 2.84, 2.85, 2.86, 2.87, 2.88, 2.89, 2.9, 2.91, 2.92, 2.93, 2.94, 2.95, 2.96, 2.97, 2.98, 2.99, 3.0, 3.01, 3.02, 3.03, 3.04, 3.05, 3.06, 3.07, 3.08, 3.09, 3.1, 3.11, 3.12, 3.13, 3.14, 3.15, 3.16, 3.17, 3.18, 3.19, 3.2, 3.21, 3.22, 3.23, 3.24, 3.25, 3.26, 3.27, 3.28, 3.29, 3.3, 3.31, 3.32, 3.33, 3.34, 3.35, 3.36, 3.37, 3.38, 3.39, 3.4, 3.41, 3.42, 3.43, 3.44, 3.45, 3.46, 3.47, 3.48, 3.49, 3.5, 3.51, 3.52, 3.53, 3.54, 3.55, 3.56, 3.57, 3.58, 3.59, 3.6, 3.61, 3.62, 3.63, 3.64, 3.65, 3.66, 3.67, 3.68, 3.69, 3.7, 3.71, 3.72, 3.73, 3.74, 3.75, 3.76, 3.77, 3.78, 3.79, 3.8, 3.81, 3.82, 3.83, 3.84, 3.85, 3.86, 3.87, 3.88, 3.89, 3.9, 3.91, 3.92, 3.93, 3.94, 3.95, 3.96, 3.97, 3.98, 3.99, 4.0, 4.01, 4.02, 4.03, 4.04, 4.05, 4.06, 4.07, 4.08, 4.09, 4.1, 4.11, 4.12, 4.13, 4.14, 4.15, 4.16, 4.17, 4.18, 4.19, 4.2, 4.21, 4.22, 4.23, 4.24, 4.25, 4.26, 4.27, 4.28, 4.29, 4.3, 4.31, 4.32, 4.33, 4.34, 4.35, 4.36, 4.37, 4.38, 4.39, 4.4, 4.41, 4.42, 4.43, 4.44, 4.45, 4.46, 4.47, 4.48, 4.49, 4.5, 4.51, 4.52, 4.53, 4.54, 4.55, 4.56, 4.57, 4.58, 4.59, 4.6, 4.61, 4.62, 4.63, 4.64, 4.65, 4.66, 4.67, 4.68, 4.69, 4.7, 4.71, 4.72, 4.73, 4.74, 4.75, 4.76, 4.77, 4.78, 4.79, 4.8, 4.81, 4.82, 4.83, 4.84, 4.85, 4.86, 4.87, 4.88, 4.89, 4.9, 4.91, 4.92, 4.93, 4.94, 4.95, 4.96, 4.97, 4.98, 4.99, 5.0]
rr=Ridge()
tg={'alpha':w}
from sklearn.model_selection import GridSearchCV
cv=GridSearchCV(rr,tg,scoring='neg_mean_absolute_error',cv=4)
cvmodel=cv.fit(Xnew,Y)
cvmodel.best_params_
{'alpha': 2.36}
rr=Ridge(alpha=2.36)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 257484090.6451184 test_err 380859377.1935505 train_ab 11105.321082717273 test_ab 13265.776840227754
cvmodel.best_estimator_
Ridge(alpha=2.36)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Ridge(alpha=2.36)
df2=pd.read_csv('D:/testing_set.csv')
df2
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | ScreenPorch | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1461 | 20 | RH | 80.0 | 11622 | Pave | NaN | Reg | Lvl | AllPub | ... | 120 | 0 | NaN | MnPrv | NaN | 0 | 6 | 2010 | WD | Normal |
| 1 | 1462 | 20 | RL | 81.0 | 14267 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | Gar2 | 12500 | 6 | 2010 | WD | Normal |
| 2 | 1463 | 60 | RL | 74.0 | 13830 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | MnPrv | NaN | 0 | 3 | 2010 | WD | Normal |
| 3 | 1464 | 60 | RL | 78.0 | 9978 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 6 | 2010 | WD | Normal |
| 4 | 1465 | 120 | RL | 43.0 | 5005 | Pave | NaN | IR1 | HLS | AllPub | ... | 144 | 0 | NaN | NaN | NaN | 0 | 1 | 2010 | WD | Normal |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2915 | 160 | RM | 21.0 | 1936 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 6 | 2006 | WD | Normal |
| 1455 | 2916 | 160 | RM | 21.0 | 1894 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 4 | 2006 | WD | Abnorml |
| 1456 | 2917 | 20 | RL | 160.0 | 20000 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 9 | 2006 | WD | Abnorml |
| 1457 | 2918 | 85 | RL | 62.0 | 10441 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | MnPrv | Shed | 700 | 7 | 2006 | WD | Normal |
| 1458 | 2919 | 60 | RL | 74.0 | 9627 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 11 | 2006 | WD | Normal |
1459 rows × 80 columns
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1459 entries, 0 to 1458 Data columns (total 80 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1459 non-null int64 1 MSSubClass 1459 non-null int64 2 MSZoning 1455 non-null object 3 LotFrontage 1232 non-null float64 4 LotArea 1459 non-null int64 5 Street 1459 non-null object 6 Alley 107 non-null object 7 LotShape 1459 non-null object 8 LandContour 1459 non-null object 9 Utilities 1457 non-null object 10 LotConfig 1459 non-null object 11 LandSlope 1459 non-null object 12 Neighborhood 1459 non-null object 13 Condition1 1459 non-null object 14 Condition2 1459 non-null object 15 BldgType 1459 non-null object 16 HouseStyle 1459 non-null object 17 OverallQual 1459 non-null int64 18 OverallCond 1459 non-null int64 19 YearBuilt 1459 non-null int64 20 YearRemodAdd 1459 non-null int64 21 RoofStyle 1459 non-null object 22 RoofMatl 1459 non-null object 23 Exterior1st 1458 non-null object 24 Exterior2nd 1458 non-null object 25 MasVnrType 1443 non-null object 26 MasVnrArea 1444 non-null float64 27 ExterQual 1459 non-null object 28 ExterCond 1459 non-null object 29 Foundation 1459 non-null object 30 BsmtQual 1415 non-null object 31 BsmtCond 1414 non-null object 32 BsmtExposure 1415 non-null object 33 BsmtFinType1 1417 non-null object 34 BsmtFinSF1 1458 non-null float64 35 BsmtFinType2 1417 non-null object 36 BsmtFinSF2 1458 non-null float64 37 BsmtUnfSF 1458 non-null float64 38 TotalBsmtSF 1458 non-null float64 39 Heating 1459 non-null object 40 HeatingQC 1459 non-null object 41 CentralAir 1459 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1459 non-null int64 44 2ndFlrSF 1459 non-null int64 45 LowQualFinSF 1459 non-null int64 46 GrLivArea 1459 non-null int64 47 BsmtFullBath 1457 non-null float64 48 BsmtHalfBath 1457 non-null float64 49 FullBath 1459 non-null int64 50 HalfBath 1459 non-null int64 51 BedroomAbvGr 1459 non-null int64 52 KitchenAbvGr 1459 non-null int64 53 KitchenQual 1458 non-null object 54 TotRmsAbvGrd 1459 non-null int64 55 Functional 1457 non-null object 56 Fireplaces 1459 non-null int64 57 FireplaceQu 729 non-null object 58 GarageType 1383 non-null object 59 GarageYrBlt 1381 non-null float64 60 GarageFinish 1381 non-null object 61 GarageCars 1458 non-null float64 62 GarageArea 1458 non-null float64 63 GarageQual 1381 non-null object 64 GarageCond 1381 non-null object 65 PavedDrive 1459 non-null object 66 WoodDeckSF 1459 non-null int64 67 OpenPorchSF 1459 non-null int64 68 EnclosedPorch 1459 non-null int64 69 3SsnPorch 1459 non-null int64 70 ScreenPorch 1459 non-null int64 71 PoolArea 1459 non-null int64 72 PoolQC 3 non-null object 73 Fence 290 non-null object 74 MiscFeature 51 non-null object 75 MiscVal 1459 non-null int64 76 MoSold 1459 non-null int64 77 YrSold 1459 non-null int64 78 SaleType 1458 non-null object 79 SaleCondition 1459 non-null object dtypes: float64(11), int64(26), object(43) memory usage: 912.0+ KB
df2.shape
(1459, 80)
df2.isna().sum()
Id 0
MSSubClass 0
MSZoning 4
LotFrontage 227
LotArea 0
...
MiscVal 0
MoSold 0
YrSold 0
SaleType 1
SaleCondition 0
Length: 80, dtype: int64
for i in df2.columns:
if (df2[i].isna().sum())>0:
if df2[i].dtypes=='object':
x=df2[i].mode()[0]
df2[i]=df2[i].fillna(x)
else:
x=df2[i].mean()
df2[i]=df2[i].fillna(x)
df2.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
..
MiscVal 0
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
Length: 80, dtype: int64
Xts=df2.drop(labels=['Id','LowQualFinSF','MiscVal'],axis=1)
Xts.shape
(1459, 77)
cat_ts=[]
con_ts=[]
for i in Xts.columns:
if Xts[i].dtypes=='object':
cat_ts.append(i)
else:
con_ts.append(i)
print(cat_ts)
print(con_ts)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
cat_ts=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd',
'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType',
'SaleCondition']
con_ts=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath',
'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
'MoSold', 'YrSold']
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
X1ts=pd.DataFrame(ss.fit_transform(Xts[con_ts]),columns=con_ts)
X1ts
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MoSold | YrSold | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.874711 | 0.555587 | 0.363929 | -0.751101 | 0.400766 | -0.340945 | -1.072885 | -0.570108 | 0.063295 | 0.517348 | ... | -0.988013 | 1.185945 | 0.366678 | -0.701628 | -0.360738 | -0.088827 | 1.818960 | -0.057227 | -0.038281 | 1.713905 |
| 1 | -0.874711 | 0.604239 | 0.897861 | -0.054877 | 0.400766 | -0.439695 | -1.214908 | 0.041273 | 1.063392 | -0.297903 | ... | -0.988013 | -0.741213 | 2.347867 | -0.178826 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | 1.713905 |
| 2 | 0.061351 | 0.263676 | 0.809646 | -0.751101 | -0.497418 | 0.844059 | 0.678742 | -0.570108 | 0.773254 | -0.297903 | ... | 0.301623 | 0.042559 | 0.930495 | -0.207871 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -1.140614 | 1.713905 |
| 3 | 0.061351 | 0.458284 | 0.032064 | -0.054877 | 0.400766 | 0.876976 | 0.678742 | -0.456889 | 0.357829 | -0.297903 | ... | 0.301623 | -0.012766 | 2.089451 | -0.178826 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | 1.713905 |
| 4 | 1.465443 | -1.244533 | -0.971808 | 1.337571 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.387298 | -0.297903 | ... | 0.301623 | 0.153210 | -0.729632 | 0.489198 | -0.360738 | -0.088827 | 2.243060 | -0.057227 | -1.875504 | 1.713905 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2.401505 | -2.314875 | -1.591330 | -1.447325 | 1.298950 | -0.044694 | -0.646813 | -0.570108 | -0.965376 | -0.297903 | ... | -2.277648 | -2.179665 | -0.729632 | -0.701628 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | -1.359958 |
| 1455 | 2.401505 | -2.314875 | -1.599808 | -1.447325 | -0.497418 | -0.044694 | -0.646813 | -0.570108 | -0.411477 | -0.297903 | ... | -0.988013 | -0.861084 | -0.729632 | -0.353093 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.773170 | -1.359958 |
| 1456 | -0.874711 | 4.447740 | 2.055150 | -0.751101 | 1.298950 | -0.373861 | 0.584059 | -0.570108 | 1.724994 | -0.297903 | ... | 0.301623 | 0.475939 | 2.982161 | -0.701628 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 1.064053 | -1.359958 |
| 1457 | 0.646389 | -0.320147 | 0.125527 | -0.751101 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.224645 | -0.297903 | ... | -2.277648 | -2.179665 | -0.103169 | -0.236915 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 0.329164 | -1.359958 |
| 1458 | 0.061351 | 0.263676 | -0.038790 | 0.641347 | -0.497418 | 0.712392 | 0.489377 | -0.037980 | 0.700719 | -0.297903 | ... | 1.591258 | 0.817111 | 0.758218 | -0.004559 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 1.798942 | -1.359958 |
1459 rows × 34 columns
X2ts=pd.get_dummies(Xts[cat_ts])
X2ts
| MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | Street_Grvl | Street_Pave | Alley_Grvl | Alley_Pave | LotShape_IR1 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1455 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 1456 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 1457 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1458 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1459 rows × 234 columns
Xnew_ts=X1ts.join(X2ts)
Xnew_ts.shape
(1459, 268)
Xnew_ts.index=range(0,1459)
keep=[]
drop=[]
for i in Xnew_ts.columns:
if i in Xnew.columns:
keep.append(i)
else:
drop.append(i)
print(keep)
['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'Fireplaces', 'GarageArea', 'WoodDeckSF', '3SsnPorch', 'PoolArea', 'MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RM', 'Alley_Grvl', 'Alley_Pave', 'LotShape_IR2', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'Utilities_AllPub', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_Inside', 'Neighborhood_Blmngtn', 'Neighborhood_BrDale', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_StoneBr', 'Condition1_Artery', 'Condition1_PosA', 'Condition1_RRAe', 'BldgType_Duplex', 'BldgType_TwnhsE', 'HouseStyle_1.5Unf', 'HouseStyle_SFoyer', 'HouseStyle_SLvl', 'RoofMatl_CompShg', 'Exterior1st_HdBoard', 'Exterior1st_Plywood', 'Exterior1st_Stucco', 'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng', 'Exterior1st_WdShing', 'Exterior2nd_ImStucc', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'MasVnrType_Stone', 'ExterQual_Ex', 'ExterCond_TA', 'Foundation_BrkTil', 'Foundation_Slab', 'Foundation_Stone', 'BsmtQual_Ex', 'BsmtQual_Fa', 'BsmtCond_Gd', 'BsmtCond_TA', 'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No', 'BsmtFinType1_GLQ', 'BsmtFinType1_LwQ', 'BsmtFinType2_GLQ', 'Heating_Wall', 'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_TA', 'CentralAir_N', 'CentralAir_Y', 'Electrical_FuseA', 'KitchenQual_Ex', 'Functional_Mod', 'Functional_Typ', 'FireplaceQu_Po', 'GarageType_Attchd', 'GarageFinish_Fin', 'GarageFinish_RFn', 'GarageFinish_Unf', 'GarageQual_Fa', 'PavedDrive_N', 'PavedDrive_Y', 'PoolQC_Gd', 'Fence_MnPrv', 'SaleCondition_Abnorml', 'SaleCondition_AdjLand', 'SaleCondition_Alloca', 'SaleCondition_Partial']
print(drop)
['LotFrontage', 'BsmtFinSF2', 'BsmtUnfSF', 'GrLivArea', 'BsmtFullBath', 'TotRmsAbvGrd', 'GarageYrBlt', 'GarageCars', 'OpenPorchSF', 'EnclosedPorch', 'ScreenPorch', 'MoSold', 'YrSold', 'MSZoning_RH', 'MSZoning_RL', 'Street_Grvl', 'Street_Pave', 'LotShape_IR1', 'LotShape_IR3', 'LotShape_Reg', 'LotConfig_FR2', 'LotConfig_FR3', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blueste', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_Timber', 'Neighborhood_Veenker', 'Condition1_Feedr', 'Condition1_Norm', 'Condition1_PosN', 'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn', 'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm', 'Condition2_PosA', 'Condition2_PosN', 'BldgType_1Fam', 'BldgType_2fmCon', 'BldgType_Twnhs', 'HouseStyle_1.5Fin', 'HouseStyle_1Story', 'HouseStyle_2.5Unf', 'HouseStyle_2Story', 'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel', 'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed', 'RoofMatl_Tar&Grv', 'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng', 'Exterior1st_AsphShn', 'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock', 'Exterior1st_CemntBd', 'Exterior1st_MetalSd', 'Exterior2nd_AsbShng', 'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn', 'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_MetalSd', 'Exterior2nd_Plywood', 'Exterior2nd_Stone', 'Exterior2nd_Wd Shng', 'MasVnrType_BrkCmn', 'MasVnrType_BrkFace', 'MasVnrType_None', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA', 'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_Po', 'Foundation_CBlock', 'Foundation_PConc', 'Foundation_Wood', 'BsmtQual_Gd', 'BsmtQual_TA', 'BsmtCond_Fa', 'BsmtCond_Po', 'BsmtExposure_Av', 'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf', 'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_LwQ', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf', 'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'HeatingQC_Po', 'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_SBrkr', 'KitchenQual_Fa', 'KitchenQual_Gd', 'KitchenQual_TA', 'Functional_Maj1', 'Functional_Maj2', 'Functional_Min1', 'Functional_Min2', 'Functional_Sev', 'FireplaceQu_Ex', 'FireplaceQu_Fa', 'FireplaceQu_Gd', 'FireplaceQu_TA', 'GarageType_2Types', 'GarageType_Basment', 'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd', 'GarageQual_Gd', 'GarageQual_Po', 'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd', 'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_P', 'PoolQC_Ex', 'Fence_GdPrv', 'Fence_GdWo', 'Fence_MnWw', 'MiscFeature_Gar2', 'MiscFeature_Othr', 'MiscFeature_Shed', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con', 'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD', 'SaleCondition_Family', 'SaleCondition_Normal']
len(Xnew_ts.columns)
268
Xnewtest=Xnew_ts[keep]
len(Xnewtest.columns)
105
Xnewtest
| MSSubClass | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | TotalBsmtSF | 1stFlrSF | ... | GarageFinish_Unf | GarageQual_Fa | PavedDrive_N | PavedDrive_Y | PoolQC_Gd | Fence_MnPrv | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.874711 | 0.363929 | -0.751101 | 0.400766 | -0.340945 | -1.072885 | -0.570108 | 0.063295 | -0.370808 | -0.654561 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1 | -0.874711 | 0.897861 | -0.054877 | 0.400766 | -0.439695 | -1.214908 | 0.041273 | 1.063392 | 0.639144 | 0.433298 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 2 | 0.061351 | 0.809646 | -0.751101 | -0.497418 | 0.844059 | 0.678742 | -0.570108 | 0.773254 | -0.266876 | -0.574165 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 3 | 0.061351 | 0.032064 | -0.054877 | 0.400766 | 0.876976 | 0.678742 | -0.456889 | 0.357829 | -0.271395 | -0.579190 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 4 | 1.465443 | -0.971808 | 1.337571 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.387298 | 0.528434 | 0.310192 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2.401505 | -1.591330 | -1.447325 | 1.298950 | -0.044694 | -0.646813 | -0.570108 | -0.965376 | -1.129968 | -1.533893 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1455 | 2.401505 | -1.599808 | -1.447325 | -0.497418 | -0.044694 | -0.646813 | -0.570108 | -0.411477 | -1.129968 | -1.533893 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 1456 | -0.874711 | 2.055150 | -0.751101 | 1.298950 | -0.373861 | 0.584059 | -0.570108 | 1.724994 | 0.401907 | 0.169499 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 1457 | 0.646389 | 0.125527 | -0.751101 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.224645 | -0.303026 | -0.468645 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1458 | 0.061351 | -0.038790 | 0.641347 | -0.497418 | 0.712392 | 0.489377 | -0.037980 | 0.700719 | -0.113237 | -0.403324 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
1459 rows × 105 columns
pred=cvmodel.predict(Xnewtest)
pred
array([138429.47870899, 181922.3828758 , 202868.94014064, ...,
196078.94599536, 119756.13433631, 222904.36483644])
predn={'Output':list(pred)}
print(predn)
{'Output': [138429.47870898744, 181922.38287579792, 202868.94014064278, 208819.3966818965, 196351.07400424732, 175876.87481910235, 171840.8437886355, 162109.3324577118, 217398.7646522493, 116522.3678507164, 184373.89166300194, 93185.85826060869, 88185.70322395136, 139578.4249525212, 95311.86454471792, 357983.38620497286, 254734.11956668185, 301680.01665617776, 306996.4268545519, 442605.7640678659, 310106.8426112622, 207064.08962930372, 183312.26713828515, 158973.36515351606, 186585.4961614316, 197269.742467857, 316747.7085790145, 219800.42414222116, 203807.8391175387, 249273.44036505, 198921.34156240843, 99194.6331966687, 203348.54848009616, 263987.5642483526, 280159.96298725903, 244068.69101453363, 177820.6950376915, 166068.76574102143, 156801.67012471927, 148089.50223901714, 181325.88028172636, 142317.15051009858, 226360.7774484935, 244758.88685853942, 231131.56173733255, 199156.61867425474, 268538.13356157125, 222324.81312315533, 162138.21168641376, 147865.7118012545, 146438.7546675175, 172584.8808808055, 153983.680810412, 156370.71819046902, 211972.8453454589, 171247.99467263432, 189413.1907731745, 134272.3638708574, 219143.05676586344, 133762.8981504584, 150684.82850944006, 184990.4025373297, 119535.40914610104, 134221.78617129408, 123590.46332829105, 90909.95470533146, 112838.12401476581, 142486.47204036132, 160429.77426453255, 199608.72893329637, 109169.74998685648, 83396.38548939151, 149091.38353274512, 131271.42093528894, 153579.2350310583, 102235.5055062846, 53860.10836451972, 156364.2687031926, 211481.44623497847, 108719.01640265514, 150695.79490344267, 163970.72051835753, 191834.96757868398, 80596.82878425218, 122254.0733792353, 132474.3920875776, 142815.9219598809, 151159.61927694597, 119972.29955485938, 142182.14881957497, 108491.53768645745, 155245.73725862373, 153695.99851778272, 108133.53792153261, 184588.4332619573, 60794.52097317774, 75710.38502720346, 99777.55530938599, 73821.38663859924, 104414.93058157737, 122147.67563088214, 122879.69076955957, 130091.49846283082, 160286.84203232167, 146933.03098924612, 250336.60957381915, 86141.62406183637, 230956.49960938218, 144653.23504051173, 137191.67691366508, 90050.26197831883, 153773.02627507545, 273784.75947913836, 138307.13737741177, 222827.71578544058, 252885.28246893443, 188199.7343838973, 158048.77939308298, 138901.86565161432, 196572.94420785643, 165699.46258126915, 128876.3603849011, 285854.72390191414, 224590.8494013497, 132066.41143492772, 54924.44531429879, 98031.2840879041, 169733.77913361817, 102742.91085200463, 127403.70141050215, 93331.99003841166, 123474.910276395, 119167.06204529258, 160413.28760884114, 119777.7643521069, 234823.80298209706, 216262.0992155557, 233171.1319213996, 178949.78977719575, 164849.40758357974, 55978.391700648644, 130950.24707175788, 54562.795991742605, 279590.87709933135, 241219.84107461863, 167972.3578870939, 190898.1410356264, 208236.4310019323, 185953.757963615, 153100.90846783388, 142323.1280684388, 196230.00709265808, 182140.34746853745, 137637.21958531407, 95502.48712696336, 70443.81123380162, 91191.45642384884, 123703.91548119346, 152765.6128455789, 191595.60956891952, 144391.57636859725, 150208.8247324972, 256974.7740015746, 218200.39607646316, 137235.07424724422, 170307.73883919138, 197027.1309126921, 285069.3856962768, 179432.11681466037, 357424.23796386307, 227712.00130327066, 251497.40843438904, 176670.90446118198, 188766.5521869199, 177312.11950813618, 149254.25481734797, 207907.86109170868, 198340.44883383127, 187906.39230946638, 254280.51213443128, 179594.74318961817, 259109.88434878795, 223727.16225495675, 239769.2726016775, 203931.90421287826, 152561.87456128732, 162644.66578562182, 129423.87709931389, 135636.29798734526, 117401.34762603225, 116920.15903023026, 89162.73304037654, 92618.32617502435, 143337.37781415615, 119877.35779827327, 134950.34965314608, 150674.7494388186, 140625.37518365012, 116529.41440578902, 152617.86830511366, 417121.39821140305, 385172.5842433302, 369909.0447555127, 450225.9141111047, 320232.5893590006, 326459.9779608729, 388466.8444632871, 339572.4655591594, 312381.4255479601, 342699.5869010817, 257941.04521642294, 386944.8625614126, 291702.325638604, 246076.76974353715, 201576.4024107564, 203465.93511894363, 220602.1136633076, 449677.18575770763, 374521.1036355234, 329929.7669416433, 259400.2617493015, 316668.7025029106, 187803.49798851222, 177448.39292743878, 172928.37859446308, 166271.10031468264, 165655.2424729298, 189524.98748213213, 191048.81355442078, 192700.70885532396, 181620.4754535124, 266194.3667169562, 171984.16708441262, 185740.38496330573, 164076.1420879405, 273255.15205503453, 168359.7183318832, 329562.5599378812, 286164.1043581335, 260216.0181755466, 272902.808255751, 233546.66998082536, 220947.55058122354, 261236.13280866997, 242270.52123730152, 401674.38162063854, 227834.4594419568, 203241.59470907465, 256352.25288947692, 228219.56625861162, 277578.6125249957, 246933.15750134355, 279158.2571216811, 225439.88884770684, 217121.3913747641, 177539.8103064155, 171549.95713128897, 139144.876399156, 221260.93425641966, 228780.6451621701, 158549.70562250778, 121765.48270378305, 160772.30677951802, 204364.27733140325, 244622.09709575388, 181507.60050579545, 131515.47953702937, 164719.5330446991, 170821.56121789097, 177715.70686499894, 110148.45606121515, 146745.63572796073, 117681.63536123937, 127837.64254301201, 101536.36580517332, 105730.00842961017, 220997.93252565275, 253623.3825703201, 239534.04243322054, 210188.24489827006, 186168.1103586313, 180727.40397832336, 178094.949758482, 262909.65560435114, 211440.97824693946, 191017.92317171663, 238904.84020179135, 228828.21913386238, 153888.92389728018, 138490.87405815138, 240729.36363899236, 109524.59478181585, 158979.5008028346, 196415.62516401615, 179394.39146236013, 36191.5544841863, 127186.41558924278, 150789.56635909423, 174501.51185627974, 164556.08196090645, 155036.0486238003, 198293.00398122674, 188553.35783443024, 114365.25814924332, 200635.21921313746, 195910.7028190593, 226913.0329815348, 149523.71805309755, 174874.3886558928, 159956.00981687647, 125816.47398612121, 147887.85265752533, 122901.43753088027, 156823.33950442116, 147866.21487565734, 132551.01893685583, 108276.70414154083, 152519.0461833253, 134488.57793160548, 166513.12812966626, 131578.60639746406, 74295.00247835193, 142345.8016759186, 92466.49326598697, 137689.35299122444, 108464.21543218792, 150135.2893264153, 34835.3729203439, 103994.81516461118, 61481.06645629917, 217850.0834282784, 164803.9516719142, 130967.65279250698, 161482.8072914038, 144930.2280707599, 145033.24190551694, 129687.77799950635, 118202.3236876862, 109027.62243616287, 118977.72590028145, 139289.02101185243, 117899.05567483959, 156460.37246593848, 131481.0553706619, 135617.60592516308, 126060.88076998369, 147568.93494957898, 125393.28292628856, 128506.61495206338, 135302.35611872555, 75602.12349310594, 120470.23521404185, 129229.83410764886, 93586.74865827175, 47351.731221106165, 102604.96219125195, 115752.00423069973, 172941.2924780891, 140882.514546274, 31701.357075515538, 93471.23554235938, 151021.66691965796, 23501.6261781347, 129881.91012802464, 147091.60723464392, 94638.12310307936, 102398.72114126236, 93887.43366667684, 111272.58451057284, 155800.92130720502, 158375.50779510773, 82579.30159071286, 148643.30036670147, 118525.70870549732, 111811.2072560239, 139114.42621050475, 67624.57025434931, 99687.20413949175, 102484.52220771962, 126058.81603687532, 154771.8853074107, 97322.49875895634, 147536.08519549036, 144129.06475435398, 148362.4893635134, 153554.70513798838, 171065.25394083338, 44135.62368854029, 94393.06614495945, 120073.46037596704, 164461.60487729684, 124105.73295635538, 107451.99040944602, 164771.4518003841, 168266.33612182422, 221941.29932386312, 135203.97307443505, 157360.01691137985, 92317.29760187985, 139784.6609753978, 94047.13307780105, 318686.51938344166, 311615.59466416476, 311682.403707733, 351615.96600992826, 337156.1266762558, 214954.52946712455, 291947.0678362652, 208247.36712714273, 220111.36776792735, 264286.98174149287, 174564.2410134592, 255857.1739524586, 139189.13969231883, 203134.45991623396, 209515.40850391425, 217200.69670411228, 203192.34404969309, 154905.64513047624, 131535.43958445208, 252825.5491755252, 247857.09649351647, 191739.81824268377, 213218.56865698873, 249585.85653587134, 288442.4531463247, 212513.5413251988, 248667.2493959418, 172970.5084899332, 120521.51026439217, 132366.24486422588, 100587.37323212897, 136001.88765997248, 123220.75171928738, 140544.51109811332, 136439.70540813342, 114373.27754895501, 112784.12062673285, 165673.43209732635, 144622.71998222976, 195511.05375352406, 198075.00313155493, 233779.92047490986, 154405.1601048569, 203627.85120323248, 189301.00094262348, 229841.68118760706, 107211.85634021969, 122121.56049959062, 117792.50172657773, 235063.4011223708, 317551.9070066287, 163357.29856745838, 64401.314893981325, 297081.2291386321, 66615.75943047486, 246466.9939984947, 142269.06646419183, 175919.16525024414, 176357.64850029713, 381997.72919627454, 334546.77840783587, 238143.16071765264, 215178.05466502177, 203324.26229819053, 381870.85692429147, 141732.40881624262, 177102.5233167599, 144019.9764898924, 125331.44541698831, 144043.47933495097, 145023.1590732561, 197659.80071695568, 185910.84143835094, 175120.2121262868, 213190.25416544953, 187394.75401502187, 177635.4580228434, 247867.97208849934, 191462.1211834277, 180701.78483748142, 177031.1551170869, 227020.3251234882, 389259.1216015254, 397932.3195811437, 170468.7988067315, 348868.80667091475, 244516.39381287835, 249511.26531479004, 193648.9194404708, 255455.60519561675, 213627.03660684195, 121074.22926144498, 180936.88707861764, 134006.8257902666, 282412.4757696456, 158582.7358724755, 268190.02787302155, 151458.63774401092, 106143.8329781511, 119899.91408446952, 88721.75797738854, 105645.51454723097, 106301.10716924543, 132826.45625321084, 132117.23334481916, 302486.8487801264, 399414.5839075703, 371497.437854924, 399428.3697664692, 426506.4072786209, 369524.2544639049, 281381.2517227444, 342708.0937967709, 448011.41175724467, 262165.44382581505, 341917.29964322073, 347892.9826849195, 308603.6281021514, 207777.87267458104, 340425.6965231657, 212046.74880517172, 201481.11800643633, 175169.37546906012, 218186.3509201309, 210722.419312992, 202892.91291774088, 169840.8323434422, 193760.28801021416, 210130.573789597, 233115.29710737313, 224395.7190173372, 169919.27192430914, 253392.8504997483, 186397.7401007996, 240940.05710132603, 301121.6532435665, 302672.9027433193, 290886.79225945316, 300010.4089910932, 261205.2427056826, 250530.84777619955, 243158.9496115066, 268512.4980079506, 233073.26936316345, 227632.95496251798, 236170.55191294645, 227797.30509724206, 197048.22069375115, 195149.46044638316, 138997.49867625211, 162579.9784610233, 187033.8003487229, 192375.8576711569, 222625.10646145744, 197998.8126594613, 197851.78383190144, 102801.9774682701, 141552.43043323446, 55532.89889308013, 90459.21654458018, 195130.3101007017, 147015.38334247438, 266545.2401400314, 329347.69409680355, 172256.87279870006, 157450.72245403935, 152767.3314947857, 175970.19294988233, 260226.1347116807, 235015.72628344648, 256991.3354256793, 249415.6446462057, 169584.05604192094, 234639.72689725203, 194492.71545014367, 198842.56978467142, 289055.0537552758, 204751.7719298238, 314419.7860925256, 289548.26966100326, 214796.17156038174, 181602.78928469034, 178922.58070091283, 209345.72983640456, 150508.32041849618, 156870.76951030327, 136293.16561240001, 146504.92168893217, 173247.47211682628, 100420.81827066273, 107620.73795136643, 153885.25138850807, 79292.26718545388, 165266.25002082897, 129548.77356742902, 105964.219428934, 216508.22481100552, 133529.60430031765, 177627.4341555327, 197063.3799043008, 138596.3288644449, 124441.73666750484, 152021.2817511695, 132962.52116339598, 174799.1621309897, 121674.69034674854, 161562.17165703006, 104314.90795296089, 107523.30877889637, 87459.75485247241, 143642.07976490914, 136365.3230622959, 176406.8842008973, 180816.62885967706, 129360.96031905958, 154889.00862701022, 124933.84173252089, 139594.45766026576, 127748.55504736141, 134640.71464720124, 135791.92066547374, 160798.03288533885, 113146.0743147063, 120741.76932282865, 115001.07769565954, 119775.4059473983, 100795.28406637526, 77510.98608030629, 122900.27975882892, 94267.95603702658, 113552.05531088036, 142404.81577573522, 114633.04392102531, 142179.08032339835, 68102.99770766076, 92572.27365070797, 176031.66365347523, 27463.57029258221, 73033.63364150809, 103310.54646559834, 115582.19002962238, 100732.8862314599, 149340.75729724477, 139164.39085927635, 39243.76439813613, 198730.59021824345, 129615.8889533532, 119118.07265919124, 125587.66275437917, 149587.2679873919, 143747.89259715177, 120013.1027758473, 109642.50660756175, 171930.82739334455, 110963.87287621052, 162750.8132688806, 133948.39056645145, 107609.97588192226, 105342.54708320186, 114639.41938113782, 118442.44963986712, 62222.79293097377, 167998.1428648341, 130989.60972149698, 140895.74273735014, 179717.10892686035, 137757.4491642686, 98118.0492823941, 153063.57447682158, 119262.03565062564, 109509.3742478083, 126486.1173101066, 136746.9492993558, 115607.62748736203, 52201.87598398331, 116821.62586100883, 137858.8252458666, 146582.76324229751, 136322.8155009721, 168382.89295415694, 128039.05173667376, 133246.6061689568, 88925.41270250204, 145529.20113969527, 191785.3673617718, 98222.96487275553, 135951.2790611822, 144839.76150655237, 243162.8104606077, 117683.38232907755, 198395.3116842569, 170893.91763794993, 106059.95625487287, 147123.31083427882, 260179.93037897832, 231963.62661342073, 246178.2059783978, 208353.0422652153, 189787.9411173648, 230664.10504952102, 367239.2617710347, 347600.65093179676, 236149.56289524233, 193292.183129425, 160231.29993884155, 231904.46020181378, 214216.23650854183, 201084.77243550948, 228226.0322822542, 153808.1586128689, 141078.6163694513, 165719.8103252726, 227744.9029044095, 265943.5408328519, 309101.01997008256, 246686.39166818536, 215670.11938329667, 129144.42535349852, 236933.00588698036, 198995.21356525196, 226419.5727668693, 194290.27690781717, 120118.75600044729, 125377.33144371737, 161339.11710059622, 146593.2048819845, 151993.7429956173, 358838.0317496629, 76517.032887183, 80617.32254347434, 54253.09490099776, 138655.02963129888, 100956.42561142631, 117552.53047518738, 107220.38596332062, 122208.02074078901, 160275.86185012624, 182087.6789336565, 152195.7025786016, 163391.36275496343, 203393.29500881396, 152036.8646648679, 202160.83784313657, 145445.89270529285, 156758.34128081935, 204475.53735983348, 261542.04186195316, 273543.5384706397, 125081.09996256935, 117193.67182988436, 131618.90826507536, 108869.56210962322, 120146.59081687861, 100638.22316088631, 174244.8857480087, 79916.65202789378, 69720.30753985267, 67886.95508109407, 55898.14733281259, 298208.6703841488, 306177.07401722507, 297217.9244940786, 217205.2453326023, 135894.74861290012, 202011.36677992047, 188875.43486453185, 280136.34760651656, 257847.96653787023, 153096.38483109963, 223874.65257338498, 190667.6876030397, 198811.27984629586, 244824.0313451966, 234458.66232419532, 253635.8125503236, 299715.848588376, 186619.7822345003, 120743.40212048411, 166312.55423084713, 147894.70512549544, 130617.69627389612, 131997.79681041258, 103411.8128782112, 85625.6811814553, 146590.27216757604, 124092.77806330427, 132420.00447776684, 124243.91942108264, 133617.9006881236, 178323.25700544543, 202579.07494739915, 158886.6551216539, 175596.10737456585, 193382.5274249122, 171123.14491188087, 223013.47533128934, 153918.4222026643, 179122.8164508945, 151624.71773809462, 212319.66435746517, 241242.08960095036, 381283.9359330546, 479182.178779133, 175550.30013878195, 322135.726587669, 385024.80755039427, 408743.58841538144, 154753.1765461972, 195351.84139102974, 224039.61813635266, 183370.83518623785, 154550.32302384294, 194121.9821661365, 175622.61361726737, 203606.43504716113, 190159.03319915317, 154876.18079408255, 140537.18062877763, 120550.43694351471, 152853.3451697718, 181131.7700638526, 104155.29057515188, 120169.6194280726, 145182.28053240152, 125150.07825761134, 367479.37180923054, 287746.488212385, 346809.927889341, 434706.9867827207, 342197.8175275042, 396843.4882134631, 444101.27322378085, 386444.016431379, 438404.91998898995, 288714.7520844491, 370281.99445205973, 353450.20650877117, 348906.39119395486, 327976.43826572684, 326894.8363166762, 252743.62623753212, 244397.5157892399, 255316.3415453794, 201519.20104409067, 191381.93448090213, 199114.2332071726, 212106.52193880838, 295570.54769860103, 211232.24368956045, 205191.67607183798, 202784.32132048917, 176511.44130283577, 201129.4698865222, 186614.65495062416, 202765.58273876025, 198239.22918691754, 192067.5990034385, 190319.29506574548, 185632.69943074085, 230166.47659640788, 192160.21136947052, 197726.33273193106, 181078.3460720582, 217389.75559984596, 172348.12683740456, 203850.26533091662, 230660.2666457584, 199766.71811357525, 191781.92206048, 320704.2868973319, 376235.1042279798, 309699.49742120947, 259290.41152462683, 281772.7518251713, 304381.8977116464, 201839.12009329392, 258787.15084686544, 211008.39539800654, 374746.70944746106, 211608.8273732349, 222495.3581613922, 220134.61587973454, 217313.19269569893, 219102.97838416504, 221387.6734914374, 205342.85812112456, 252410.12269906772, 204285.42334143183, 322020.71834501147, 273142.71402869927, 242050.5006842112, 255468.55009805673, 148492.84095313435, 144127.540831265, 153247.53201508522, 185088.12411201172, 199871.7032481337, 126955.86288055188, 106129.60761637986, 149897.0568603264, 271041.548132023, 144582.6125592404, 167731.73106543557, 206694.86956779525, 184368.66813848383, 212028.76094941347, 219532.03624580635, 203459.43213765294, 169692.1242577887, 172970.1212817044, 198974.3657911716, 287640.613894351, 312532.9514125667, 195914.3762130856, 281323.78795567807, 332130.7281520191, 142751.69792631478, 223885.54199808516, 138815.54645441423, 159389.38432349093, 195370.691085368, 198991.83918230853, 248306.0464151327, 160212.21294003434, 122919.88895824266, 136914.84717253322, 107479.88989534727, 113030.35466647925, 150362.81681228825, 148889.55870275883, 122336.57950094441, 164620.55780703644, 151068.902273063, 216189.71601170622, 140925.09303434315, 228505.15127612196, 122000.4419587577, 49831.46972838558, 51726.280720194554, 125967.20423822112, 123135.27715201993, 152102.01678111852, 155352.65094345808, 149307.7394574734, 81673.29442118164, 140303.72893098334, 153432.18812972703, 120971.80329693266, 190708.6363421952, 114457.96726618739, 170632.12672567568, 135976.18987463886, 168042.10683686356, 142679.35124531042, 139513.73255852883, 127702.88786640005, 124937.62037786061, 134455.27239068787, 123644.98585200337, 142371.91826926603, 111282.59856877156, 122068.34790203131, 139082.9872935013, 242283.5344149361, 127905.68403636554, 126895.71942161187, 180008.5945867083, 107019.52071677288, 136638.89479005075, 103445.5224767554, 148041.81076362054, 142863.58125913926, 144392.46575217193, 165041.1076521867, 120860.54276606701, 108197.70154830665, 116060.3333762302, 90005.98416568317, 130092.85491797802, 96309.43117420154, 92574.28181266665, 135116.81103196042, 130685.24744002696, 80381.92859908138, 144815.14270624175, 188519.44716388237, 131801.94169478005, 110510.55403771275, 171142.66710819415, 125877.88417970043, 209734.63013473916, 93170.93858601025, 120604.926609426, 79460.96280625308, 167291.67860155212, 130063.78225635718, 134323.63248711303, 111884.63069312416, 139366.16034213002, 120831.88531768508, 130785.82756550163, 120532.9512418532, 195634.86221957408, 122234.51044585918, 117114.5049191693, 125088.25444637165, 95522.63666258552, 98849.20461204486, 217303.2852468752, 212840.66083243332, 201959.82301365305, 102337.70324220575, 72458.90719618095, 229334.41411041532, 85986.76557015686, 98157.09673267146, 156204.41285252135, 117457.21224408185, 160750.56400156303, 117414.4341011461, 96189.59650181589, 101627.2279661617, 119967.52478943515, 118765.31140029573, 149383.61298725553, 189526.66214877364, 156666.7335139268, 158336.83286827325, 142519.75466090444, 98593.33912144127, 177931.53993731964, 154036.42267690238, 161251.45116420844, 92102.4606556542, 221681.12128926453, 174886.13241769228, 115810.92514651269, 94135.71219188282, 105394.54126094944, 148595.0557841782, 151841.2218037859, 90206.55606835558, 207431.9185209943, 216307.0061781865, 244868.53496713468, 275751.0945082165, 247988.74365365997, 213927.8780019593, 214002.66382210463, 171314.41522252123, 219036.89949559502, 214513.7916561329, 236110.29570858437, 163667.07710467442, 176105.2181313797, 154184.86293097638, 152744.5277213424, 249539.5489154773, 226063.9167258073, 189626.77197337357, 224775.2599771276, 122603.68578899269, 138538.7333757053, 152233.90520751057, 158124.4218331909, 122802.09916363955, 119293.18524261766, 147864.70877711312, 121601.82603893887, 259298.93869248097, 228668.15199897886, 198107.1236814824, 230717.5951724775, 283169.6207114856, 233846.67404248423, 251277.9932750804, 188479.29613899067, 188363.2180603618, 180781.28007992427, 179195.3582960936, 160490.85977738246, 126968.68381379744, 121126.56751189144, 142639.9961199078, 122743.67050600956, 138204.7041229959, 178559.71976288885, 163615.66800391593, 701156.7655382574, 139920.41761457335, 130325.83512693169, 64773.5875787275, 83784.22851353334, 104879.20207959713, 103510.81061610617, 120123.57616436663, 176285.34874232954, 153620.7532961455, 168569.85649360297, 158007.0268333912, 153376.14696621406, 168877.64834343333, 193863.6121789325, 151697.31293009344, 177415.46858627052, 120223.57715361883, 218146.8674978149, 239298.24098789808, 117267.20126432595, 219743.8829731237, 156765.34800398265, 237494.6444048073, 288372.7236191959, 140900.31445541338, 72757.5114971696, 147385.30176193223, 75111.89895621543, 49987.74537277565, 102673.68855849291, 129773.35153576639, 100084.96545037834, 288012.3855110574, 193596.1818191847, 194556.55049295735, 228639.55794094806, 213391.4866108553, 140313.49693144008, 155934.84761981215, 196131.46550434537, 216262.45922283168, 217365.48962944667, 266793.0328757516, 182766.6658382906, 213053.7460785606, 272300.81054144166, 197647.22857608553, 288203.53475885274, 337667.34544186376, 212547.77228127216, 146569.22999881417, 72456.86890470337, 90539.95742908397, 86360.62279414808, 65943.25639778725, 147687.99994954147, 228269.37387424282, 206496.6292042263, 155586.53809486193, 110367.88777396426, 173453.36748014684, 152787.06266925042, 133526.45822447416, 123264.1221267845, 170293.98370109868, 142757.282842152, 217618.95617047424, 275614.43303083733, 199026.4194057056, 195429.89109219378, 179824.2582143548, 189450.18471460792, 241069.1910393563, 286891.65161887393, 291686.5546374761, 168982.60578596013, 175520.82082160306, 447591.2152309249, 490277.29289128125, 380956.84406507167, 446503.76010831754, 408563.9446855726, 316190.40150136413, 415808.21106287494, 153139.39971650363, 177931.7334039456, 206575.623904353, 275651.5343219063, 181065.83709959997, 150794.56073721123, 97907.09413973943, 186209.05372208206, 102234.03058285717, 108887.50669068069, 103071.02389621176, 86636.04373292171, 101013.20254676536, 142409.96201607515, 152220.27269719364, 120362.87476897257, 130187.06449785846, 392944.641746196, 252480.44286834705, 273923.3980316075, 389071.5888963544, 313492.88637132186, 359915.32762092416, 331702.32412109297, 321731.82687504566, 360820.1628575809, 356376.17777633003, 367820.762851815, 279442.5861199783, 285140.5489857327, 340066.6648182466, 283901.90979138564, 181263.81358488294, 188554.09060688567, 188386.62203504532, 270620.54824141314, 188692.2836755921, 193289.11369480158, 197422.3930638046, 198095.20472162712, 183284.27770817344, 189779.08484164235, 204276.47048315132, 255636.44634518528, 267204.8374299147, 275999.50572589075, 380423.6758033915, 301501.24148259975, 452602.84947904735, 304431.1231723949, 296121.8236430151, 259984.63430160267, 303295.7355121705, 213202.8904333741, 218986.19867425354, 396307.3892069306, 200434.65029805794, 146485.62397018907, 206756.69663424842, 142778.7998840178, 188338.39179845253, 180417.14988865942, 193978.48982279902, 194825.6852878143, 164336.7076379818, 141562.39851011455, 138578.97023013007, 99324.73809727328, 117997.17948803527, 140230.5629389138, 121774.98500952014, 90562.88394836235, 107797.66790901215, 141278.19987579764, 105877.69332095707, 136674.54866879946, 263041.2711104948, 364753.9452677172, 160144.93255317875, 140954.81279999227, 179041.59458977217, 134058.30729588825, 195597.36371704325, 215895.27305576287, 127789.93884246495, 165060.36064649755, 140878.0786306943, 156268.58965156865, 150663.27265455446, 136659.70943285938, 126497.68777666881, 151666.79141522362, 141078.37235014024, 168808.27344893775, 153113.93351077702, 155179.79623101116, 136254.16642568837, 135279.88765275737, 157155.1129358259, 152963.8771981638, 139038.35903351833, 145233.88963091903, 120364.80921359295, 134742.88073758426, 153647.1238001248, 143111.99128496612, 153068.70334245867, 161868.031008867, 154702.54349931082, 158688.48528416659, 140944.0536370623, 150856.15559950314, 147568.23666708192, 105746.93297921607, 126995.21443411933, 122493.6112161004, 137860.51710229804, 200556.0770410179, 64726.7727368589, 241696.2360450248, 142948.37771621704, 84219.37811992427, 57353.22624388742, 67034.23472451956, 160418.35443062882, 132813.97139645627, 140380.26455144593, 143400.84127476212, 184055.81887770974, 152649.09127926241, 296201.64887168823, 158154.3420692948, 93954.6811821168, 120728.93031846578, 137024.46522218076, 137359.87057595223, 99201.099832181, 81693.00376044301, 175944.17260347886, 163690.25507156155, 119520.81529486783, 143875.7389221932, 155239.18174704356, 121989.17107244913, 118946.79108510804, 98850.5588795106, 92790.40987849911, 95999.07109962666, 87493.02938836692, 99102.50888656545, 136844.12573977243, 52574.69934080941, 130906.1218818655, 67144.0278670701, 165152.7730991534, 96560.74122904621, 113967.8847940906, 40717.92802351815, 165765.99242291966, 89149.95801904274, 114126.15598868838, 93534.0918141781, 275530.880271903, 112708.26496230817, 116428.57366502905, 68327.55633655647, 112605.25803559362, 134289.95271062886, 181125.3051434361, 138769.40274840294, 115151.52672857967, 62152.14042606621, 163386.22499346145, 155251.83067787587, 130452.05153303896, 118362.85548110382, 157160.9263845427, 172518.5547594749, 156621.75839719592, 158570.29887832896, 104345.51639109559, 221670.21729154073, 143674.21145468153, 137631.3487563161, 152937.59781549167, 144436.3758941751, 94380.75874569452, 199131.63507841743, 368665.46495251846, 184445.4121052117, 151526.43379804393, 142121.95328317373, 142227.29571719017, 242884.85485821276, 188382.83846184605, 232323.82478164864, 184499.3906095124, 258128.22949304234, 312613.1245550418, 237339.41209869582, 227312.75326894762, 194082.4344056901, 161756.271253581, 147136.89074091284, 184850.9661957696, 206057.68195981332, 210776.79437763547, 231461.8359343781, 169443.14902549903, 167979.0185577455, 122440.17184778312, 217274.05192358274, 216864.46540315784, 218624.72510464126, 203032.82814575356, 267769.74156917783, 232222.0627402041, 226904.67722060706, 235295.7894917981, 132727.1746321649, 202767.53885580567, 204076.25563043775, 191771.50314169656, 208078.8153567196, 96619.45787562853, 135642.34748469343, 140844.10652705166, 193283.0350311599, 140676.97937635073, 253196.85821965866, 144917.62520178815, 145027.37225613967, 90408.87800190772, 103019.68497988794, 107073.67646613337, 142670.1873272079, 61616.69550646971, 30324.36887887695, 94542.29430108253, 152105.29938538608, 107052.80794320317, 169481.1996268893, 154442.9482259975, 180116.88985437938, 146656.48442787927, 108123.59999049321, 159762.76339561722, 202218.47596970882, 205601.1465946383, 203496.41609014457, 191163.07596724044, 257864.7360583227, 119619.75644580805, 147957.45907318874, 36629.56996690984, 83717.46209540284, 165641.93072052614, 30276.621805440373, 46220.80783561109, 50552.47984757008, 337400.36010115047, 294457.09440690547, 245582.07162181917, 151678.98120989557, 214351.10805479388, 161987.44746525842, 243144.13072271284, 200616.0085992656, 313780.3568763789, 330191.76543508546, 88763.81397332935, 203758.73645954957, 112259.60771355737, 122385.44742968076, 159279.77436937412, 66700.98302041357, 78203.45085746772, 153033.48208572622, 77092.49729270492, 64136.51587698409, 79205.20192973531, 81236.38607881726, 196078.94599535665, 119756.1343363133, 222904.36483643777]}
len(predn)
1
ID=df2['Id']
ID
0 1461
1 1462
2 1463
3 1464
4 1465
...
1454 2915
1455 2916
1456 2917
1457 2918
1458 2919
Name: Id, Length: 1459, dtype: int64
df_new=pd.DataFrame(ID)
df_new
| Id | |
|---|---|
| 0 | 1461 |
| 1 | 1462 |
| 2 | 1463 |
| 3 | 1464 |
| 4 | 1465 |
| ... | ... |
| 1454 | 2915 |
| 1455 | 2916 |
| 1456 | 2917 |
| 1457 | 2918 |
| 1458 | 2919 |
1459 rows × 1 columns
df_news=pd.DataFrame(predn)
df_news
| Output | |
|---|---|
| 0 | 138429.478709 |
| 1 | 181922.382876 |
| 2 | 202868.940141 |
| 3 | 208819.396682 |
| 4 | 196351.074004 |
| ... | ... |
| 1454 | 79205.201930 |
| 1455 | 81236.386079 |
| 1456 | 196078.945995 |
| 1457 | 119756.134336 |
| 1458 | 222904.364836 |
1459 rows × 1 columns
df_final=df_new.join(df_news)
df_final
| Id | Output | |
|---|---|---|
| 0 | 1461 | 138429.478709 |
| 1 | 1462 | 181922.382876 |
| 2 | 1463 | 202868.940141 |
| 3 | 1464 | 208819.396682 |
| 4 | 1465 | 196351.074004 |
| ... | ... | ... |
| 1454 | 2915 | 79205.201930 |
| 1455 | 2916 | 81236.386079 |
| 1456 | 2917 | 196078.945995 |
| 1457 | 2918 | 119756.134336 |
| 1458 | 2919 | 222904.364836 |
1459 rows × 2 columns